Check working directory

getwd()
## [1] "/Users/alexg/R files/hair_cortisol/skew-normal FINAL"

Load packages

library(dlookr)
## Registered S3 methods overwritten by 'dlookr':
##   method          from  
##   plot.transform  scales
##   print.transform scales
## 
## Attaching package: 'dlookr'
## The following object is masked from 'package:base':
## 
##     transform
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(psych)
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## The following object is masked from 'package:dlookr':
## 
##     describe
library(reshape)
## 
## Attaching package: 'reshape'
## The following object is masked from 'package:dplyr':
## 
##     rename
library(readxl)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.4     ✔ tibble    3.2.1
## ✔ purrr     1.0.4     ✔ tidyr     1.3.1
## ✔ readr     2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ psych::%+%()       masks ggplot2::%+%()
## ✖ psych::alpha()     masks ggplot2::alpha()
## ✖ tidyr::expand()    masks reshape::expand()
## ✖ tidyr::extract()   masks dlookr::extract()
## ✖ dplyr::filter()    masks stats::filter()
## ✖ dplyr::lag()       masks stats::lag()
## ✖ reshape::rename()  masks dplyr::rename()
## ✖ lubridate::stamp() masks reshape::stamp()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(vioplot)
## Loading required package: sm
## Package 'sm', version 2.2-6.0: type help(sm) for summary information
## 
## Attaching package: 'sm'
## 
## The following object is masked from 'package:dlookr':
## 
##     binning
## 
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(vtable)
## Loading required package: kableExtra
## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(brms)
## Loading required package: Rcpp
## Loading 'brms' package (version 2.22.0). Useful instructions
## can be found by typing help('brms'). A more detailed introduction
## to the package is available through vignette('brms_overview').
## 
## Attaching package: 'brms'
## 
## The following object is masked from 'package:psych':
## 
##     cs
## 
## The following object is masked from 'package:stats':
## 
##     ar
library(bayesplot)
## This is bayesplot version 1.12.0
## - Online documentation and vignettes at mc-stan.org/bayesplot
## - bayesplot theme set to bayesplot::theme_default()
##    * Does _not_ affect other ggplot2 plots
##    * See ?bayesplot_theme_set for details on theme setting
## 
## Attaching package: 'bayesplot'
## 
## The following object is masked from 'package:brms':
## 
##     rhat
library(bayestestR)
library(loo)
## This is loo version 2.8.0
## - Online documentation and vignettes at mc-stan.org/loo
## - As of v2.0.0 loo defaults to 1 core but we recommend using as many as possible. Use the 'cores' argument or set options(mc.cores = NUM_CORES) for an entire session.
library(priorsense)
library(rethinking)
## Loading required package: cmdstanr
## This is cmdstanr version 0.8.0
## - CmdStanR documentation and vignettes: mc-stan.org/cmdstanr
## - CmdStan path: /Users/alexg/.cmdstan/cmdstan-2.36.0
## - CmdStan version: 2.36.0
## Loading required package: posterior
## This is posterior version 1.6.1
## 
## Attaching package: 'posterior'
## 
## The following object is masked from 'package:bayesplot':
## 
##     rhat
## 
## The following object is masked from 'package:dlookr':
## 
##     entropy
## 
## The following objects are masked from 'package:stats':
## 
##     mad, sd, var
## 
## The following objects are masked from 'package:base':
## 
##     %in%, match
## 
## Loading required package: parallel
## rethinking (Version 2.42)
## 
## Attaching package: 'rethinking'
## 
## The following object is masked from 'package:loo':
## 
##     compare
## 
## The following objects are masked from 'package:brms':
## 
##     LOO, stancode, WAIC
## 
## The following object is masked from 'package:purrr':
## 
##     map
## 
## The following objects are masked from 'package:psych':
## 
##     logistic, logit, sim
## 
## The following object is masked from 'package:stats':
## 
##     rstudent

Load data

df <- read_xlsx("hair_cort_dog_all.xlsx", col_types = c("text", "text",  
                               "text", "text", "text", "text",
                               "text", "numeric","text", "skip",
                               "numeric", "skip", "skip", 
                               "numeric", "skip"))
df <- as.data.frame(df)

INITIAL DATA PLOTTING AND EXPLORATION

Check characteristics of df

dim(df) # will tell you how many rows and columns the dataset has
## [1] 73 11
class(df) # will tell you what data structure has the dataset been assigned
## [1] "data.frame"

Explore the dataset to understand its structure.

head(df)
##   number   group visit season breed_group coat_colour    sex age comorbidity
## 1     c1 stopped    v0 winter         ret        dark   Male  43         yes
## 2     c2 stopped    v0 autumn         mix        dark   Male 105         yes
## 3     c3 stopped    v0 spring        ckcs         mix Female 117         yes
## 4     c4 stopped    v0 summer         ret        dark Female 108         yes
## 5     c5 stopped    v0 summer         ret        dark Female 110         yes
## 6     c6 stopped    v0 winter         mix       light Female 120         yes
##   fat_percent cortisol
## 1    52.21393 4.924220
## 2    38.52059 7.304202
## 3    46.94916 1.590000
## 4    44.46813 0.861570
## 5    39.59363 6.217317
## 6          NA 4.426785

1. Get summary stats for numeric data

numeric_df <- Filter(is.numeric, df)
describe(numeric_df) # the describe function in psych provides summary stats
##             vars  n  mean    sd median trimmed   mad   min    max  range  skew
## age            1 73 95.85 35.58 101.00   96.07 28.17 16.00 182.00 166.00 -0.10
## fat_percent    2 55 40.48  7.82  40.86   40.70  5.71 17.86  61.20  43.34 -0.28
## cortisol       3 73  8.11 16.47   2.33    4.07  2.17  0.41 104.62 104.20  3.89
##             kurtosis   se
## age            -0.17 4.16
## fat_percent     0.77 1.05
## cortisol       16.80 1.93

2. Check normality of all numeric variables

a. graphical assessment

plot_normality(numeric_df)

b. shapiro-wilk test

apply(numeric_df, 2, shapiro.test)
## $age
## 
##  Shapiro-Wilk normality test
## 
## data:  newX[, i]
## W = 0.97361, p-value = 0.1288
## 
## 
## $fat_percent
## 
##  Shapiro-Wilk normality test
## 
## data:  newX[, i]
## W = 0.97956, p-value = 0.4692
## 
## 
## $cortisol
## 
##  Shapiro-Wilk normality test
## 
## data:  newX[, i]
## W = 0.46269, p-value = 6.756e-15

c. repeat Q-Q plots with transformed data

i. log(cortisol)

qqnorm(df$cortisol)
qqline(df$cortisol, col = "red")

qqnorm(log(df$cortisol))
qqline(log(df$cortisol), col = "red")

ii Shapiro test for log cortisol

shapiro.test(log(df$cortisol))
## 
##  Shapiro-Wilk normality test
## 
## data:  log(df$cortisol)
## W = 0.94725, p-value = 0.004126

3. Check cortisol data numerically

a. summarisecortisol and log cortisol

summary(df$cortisol)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##   0.4141   1.4119   2.3331   8.1089   6.8455 104.6172
sd(df$cortisol)
## [1] 16.47372
summary(log(df$cortisol))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -0.8817  0.3449  0.8472  1.1816  1.9236  4.6503

b. log-transform cortisol variable

df$lgCort <- log(df$cortisol)
summary(df$lgCort)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -0.8817  0.3449  0.8472  1.1816  1.9236  4.6503
sd(df$lgCort)
## [1] 1.208074

c. visualise lgCort variable

hist(df$lgCort)

d. Check hair cortisol by comorbidity

i. cortisol

describeBy(cortisol ~ comorbidity, data = df)
## 
##  Descriptive statistics by group 
## comorbidity: no
##          vars  n mean   sd median trimmed  mad  min  max range skew kurtosis
## cortisol    1 15 2.31 1.51   1.84    2.17 0.96 0.75 5.65   4.9 1.19     0.24
##            se
## cortisol 0.39
## ------------------------------------------------------------ 
## comorbidity: yes
##          vars  n mean   sd median trimmed  mad  min    max range skew kurtosis
## cortisol    1 58 9.61 18.2   2.82    5.39 2.89 0.41 104.62 104.2  3.4    12.58
##            se
## cortisol 2.39

ii. lgCort

describeBy(lgCort ~ comorbidity, data = df)
## 
##  Descriptive statistics by group 
## comorbidity: no
##        vars  n mean  sd median trimmed  mad   min  max range skew kurtosis   se
## lgCort    1 15 0.66 0.6   0.61    0.65 0.59 -0.29 1.73  2.02 0.23    -0.85 0.16
## ------------------------------------------------------------ 
## comorbidity: yes
##        vars  n mean   sd median trimmed  mad   min  max range skew kurtosis
## lgCort    1 58 1.32 1.29   1.04    1.24 1.35 -0.88 4.65  5.53 0.59    -0.24
##          se
## lgCort 0.17

iii. Calculate mean difference in lgCort for different comorbidity status

diffLgCortCo <- mean(df$lgCort[df$comorbidity == "yes"]) - mean(df$lgCort[df$comorbidity == "no"])
diffLgCortCo
## [1] 0.6535362
exp(diffLgCortCo)
## [1] 1.922327
exp(0.25)
## [1] 1.284025
diffLgCortCo / 1.29
## [1] 0.5066172

4. Generate variables for use in modelling

a. create a binary outcome variable for group, where “stopped” is ref category

df$weight_loss_success <- df$group
df$weight_loss_success <- ifelse(df$weight_loss_success == "completed", "yes", "no")

b. create simple category name for breed and convert to factor

df$breed <- df$breed_group
df$breed <- factor(df$breed, levels = c("mix", "ckcs", "pug", "ret", "other"))
head(df$breed)
## [1] ret  mix  ckcs ret  ret  mix 
## Levels: mix ckcs pug ret other

c. reorder season so spring is reference and rest are in orderd

df$season <- factor(df$season, levels = c("spring", "summer", "autumn", "winter"))
head(df$season)
## [1] winter autumn spring summer summer winter
## Levels: spring summer autumn winter

d. make light hair colour the reference category

df$coat_colour <- factor(df$coat_colour, levels = c("light", "mix", "dark"), ordered = FALSE)
head(df$coat_colour)
## [1] dark  dark  mix   dark  dark  light
## Levels: light mix dark

5. Generate data summary

sumtable(df)
Summary Statistics
Variable N Mean Std. Dev. Min Pctl. 25 Pctl. 75 Max
group 73
… completed 42 58%
… stopped 31 42%
visit 73
… v0 52 71%
… v1 21 29%
season 73
… spring 14 19%
… summer 22 30%
… autumn 21 29%
… winter 16 22%
breed_group 73
… ckcs 7 10%
… mix 16 22%
… other 26 36%
… pug 7 10%
… ret 17 23%
coat_colour 73
… light 27 37%
… mix 16 22%
… dark 30 41%
sex 73
… Female 43 59%
… Male 30 41%
age 73 96 36 16 73 117 182
comorbidity 73
… no 15 21%
… yes 58 79%
fat_percent 55 40 7.8 18 37 45 61
cortisol 73 8.1 16 0.41 1.4 6.8 105
lgCort 73 1.2 1.2 -0.88 0.34 1.9 4.7
weight_loss_success 73
… no 31 42%
… yes 42 58%
breed 73
… mix 16 22%
… ckcs 7 10%
… pug 7 10%
… ret 17 23%
… other 26 36%

6. Visualise associations

a. associtation between cortisol and sex

i. violin plot (vioplot package) for cortisol

par(mfrow = c(1,1))
vioplot(cortisol ~ sex, col = "firebrick",
        data = df)

ii. violin plot (vioplot package) for lgCort

par(mfrow = c(1,1))
vioplot(lgCort ~ sex, col = "lemonchiffon",
        data = df)

b. association between lgCortisol and breed…

i. violin plot (vioplot package) for lgCort

par(mfrow = c(1,1))
vioplot(lgCort ~ breed, col = "firebrick",
        data = df)

iii. stripchart for lgCort

stripchart(lgCort ~ breed, vertical = TRUE, method = "jitter",
           col = "steelblue3", data = df, pch = 20)

c. association between lgCortisol and season

i. violin plot (vioplot package) for lgCort

par(mfrow = c(1,1))
vioplot(lgCort ~ season, col = "lemonchiffon",
        data = df)

ii. stripchart for lgCort

stripchart(lgCort ~ season, vertical = TRUE, method = "jitter",
           col = "steelblue3", data = df, pch = 20)

d. association between lgCortisol and comorbidity

i. violin plot (vioplot package) for lgCort

par(mfrow = c(1,1))
vioplot(cortisol ~ comorbidity, col = "steelblue",
        data = df)

ii. violin plot (vioplot package) for lgCort

par(mfrow = c(1,1))
vioplot(lgCort ~ comorbidity, col = "steelblue",
        data = df)

e. association between cortisol vs age

iv. basic dot-plot for cortisol vs age

plot(cortisol ~ age, col = "steelblue3", data = df, pch = 20)

iv. basic dot-plot for lgCort vs age

plot(lgCort ~ age, col = "steelblue3", data = df, pch = 20)

STANDARDIZE DATA FOR MODELLING

1. standardize data for Bayesian modelling

a. fat

df$sFat <- standardize(df$fat_percent)
summary(df$sFat)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max.     NA's 
## -2.89165 -0.43568  0.04814  0.00000  0.56366  2.64873       18

b. age

df$sAge <- standardize(df$age)
summary(df$sAge)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -2.2444 -0.6422  0.1448  0.0000  0.5945  2.4215

c. cortisol

df$slgCort <- standardize(df$lgC)
summary(df$slgCort)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -1.7079 -0.6925 -0.2768  0.0000  0.6142  2.8713
hist(df$slgCort)

2. create dataset only containing complete data

df2 <- na.omit(df)

Combined dot plot and boxplot for LgCort vs comorbidity

  # plot
  ggplot(data = df2, aes(x = comorbidity)) +
    geom_boxplot(aes(y = slgCort),
                 color = "black", width=0.5, lwd = 1) +
 
   geom_point(aes(y = slgCort), 
             size = 4, color = "purple",
             position = position_jitter(width = 0.2)) +
             scale_shape(solid = FALSE) +

   theme_bw() +
    labs(title = "Log hair cortisol by comorbidity") +
         labs(y = paste0("Log Hair Cortisol (standardised)")) +
         labs(x = paste0("Comorbidity")) +
         theme(axis.title.y = element_text(size=16, face = "bold"), 
               axis.title.x = element_text(size=16, face = "bold"),
               axis.text = element_text(size = 14, face = "bold"),
               title = element_text(size=18, face="bold"),
               plot.title = element_text(hjust = 0.5)) 

MODEL FOR COMORBIDITY ALLOWING UNEQUEAL VARIANCE FOR COMORBIDITY

i. Model code

model <- brm(slgCort ~ comorbidity + sAge + breed + sex + (1 | visit), family = skew_normal(), data = df)

ii. Check what priors need to be set

default_prior(slgCort ~ comorbidity + sAge + breed + sex + (1 | visit),
                   family = skew_normal(),
                   data = df)
##                    prior     class           coef group resp dpar nlpar lb ub
##             normal(0, 4)     alpha                                           
##                   (flat)         b                                           
##                   (flat)         b      breedckcs                            
##                   (flat)         b     breedother                            
##                   (flat)         b       breedpug                            
##                   (flat)         b       breedret                            
##                   (flat)         b comorbidityyes                            
##                   (flat)         b           sAge                            
##                   (flat)         b        sexMale                            
##  student_t(3, -0.3, 2.5) Intercept                                           
##     student_t(3, 0, 2.5)        sd                                       0   
##     student_t(3, 0, 2.5)        sd                visit                  0   
##     student_t(3, 0, 2.5)        sd      Intercept visit                  0   
##     student_t(3, 0, 2.5)     sigma                                       0   
##        source
##       default
##       default
##  (vectorized)
##  (vectorized)
##  (vectorized)
##  (vectorized)
##  (vectorized)
##  (vectorized)
##  (vectorized)
##       default
##       default
##  (vectorized)
##  (vectorized)
##       default

1. Set priors

NB skip the sigma parameter as this will be determined by the model

# Set individual priors
prior_int <- set_prior("normal(0, 0.5)", class = "Intercept")
prior_b <- set_prior("normal(0, 1)", class = "b")
prior_b_sex <- set_prior("normal(0, 1)", class = "b", coef = "sexMale")
prior_b_co <- set_prior("normal(0.25, 1)", class = "b", coef = "comorbidityyes")
prior_b_sAge <- set_prior("normal(0, 0.5)", class = "b", coef = "sAge")
prior_sd <- set_prior("normal(0, 1)", class = "sd")
prior_alpha <- set_prior("normal(4, 2)", class = "alpha")

# Combine priors into list
priors2 <- c(prior_int, prior_b, prior_b_sex, prior_b_co, prior_b_sAge, prior_sd, prior_alpha)

2. Run model2

Increased adapt_delta >0.8 (0.999 here), as had divergent transitions

set.seed(999)
model2 <- brm(bf(slgCort ~ comorbidity + sAge + breed + sex + (1 | visit),
                 sigma ~ comorbidity),
                   family = skew_normal(),
                   prior = priors2,
                   data = df,
                   control=list(adapt_delta = 0.99999, stepsize = 0.001, max_treedepth =15),
                   iter = 8000, warmup = 2000,
                   cores = 4,
                   save_pars = save_pars(all =TRUE),
                   sample_prior = TRUE)
## Compiling Stan program...
## Trying to compile a simple C file
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## using C compiler: ‘Apple clang version 17.0.0 (clang-1700.0.13.5)’
## using SDK: ‘MacOSX15.5.sdk’
## clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -D_HAS_AUTO_PTR_ETC=0  -include '/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/opt/R/arm64/include    -fPIC  -falign-functions=64 -Wall -g -O2  -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
## /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
##   679 | #include <cmath>
##       |          ^~~~~~~
## 1 error generated.
## make: *** [foo.o] Error 1
## Start sampling
## Found more than one class "stanfit" in cache; using the first, from namespace 'rethinking'
## Also defined by 'rstan'
## Found more than one class "stanfit" in cache; using the first, from namespace 'rethinking'
## Also defined by 'rstan'
## Found more than one class "stanfit" in cache; using the first, from namespace 'rethinking'
## Also defined by 'rstan'
## Found more than one class "stanfit" in cache; using the first, from namespace 'rethinking'
## Also defined by 'rstan'

3. get summary of model

summary(model2)
##  Family: skew_normal 
##   Links: mu = identity; sigma = log; alpha = identity 
## Formula: slgCort ~ comorbidity + sAge + breed + sex + (1 | visit) 
##          sigma ~ comorbidity
##    Data: df (Number of observations: 73) 
##   Draws: 4 chains, each with iter = 8000; warmup = 2000; thin = 1;
##          total post-warmup draws = 24000
## 
## Multilevel Hyperparameters:
## ~visit (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.34      0.35     0.01     1.31 1.00     8851     9509
## 
## Regression Coefficients:
##                      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept               -0.59      0.38    -1.35     0.16 1.00    10808
## sigma_Intercept         -0.68      0.23    -1.08    -0.18 1.00    13787
## comorbidityyes           0.65      0.24     0.17     1.11 1.00    15043
## sAge                    -0.14      0.11    -0.34     0.07 1.00    17882
## breedckcs                0.00      0.40    -0.82     0.76 1.00    18541
## breedpug                 0.03      0.38    -0.70     0.78 1.00    12245
## breedret                -0.10      0.31    -0.71     0.50 1.00    14011
## breedother               0.20      0.31    -0.39     0.81 1.00    12118
## sexMale                  0.06      0.22    -0.36     0.49 1.00    19543
## sigma_comorbidityyes     0.77      0.25     0.24     1.23 1.00    15288
##                      Tail_ESS
## Intercept               13055
## sigma_Intercept         11930
## comorbidityyes          15476
## sAge                    15999
## breedckcs               16878
## breedpug                15716
## breedret                16586
## breedother              15436
## sexMale                 16482
## sigma_comorbidityyes    13778
## 
## Further Distributional Parameters:
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## alpha     3.13      1.30     0.81     6.00 1.00    16416    13614
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

4. MCMC diagnostics

a. check distriitions and trace plots

plot(model2)

Looking for hairy caterpillars

b. try a trank plot as well

mcmc_plot(model2, type = 'rank_overlay')

5. calculate 97% HPDI for comorbiity

Usually better than the compatability intervals given in the summary

draws <- as.matrix(model2)
HPDI(draws[,3], 0.97) # 3rd column is draws for comorbidity
##     |0.97     0.97| 
## 0.1336725 1.1887329

6. Calculate R2 for model

bayes_R2(model2, probs = c(0.015, 0.5, 0.985)) # 0.015, 0.5, 0.985 are the quantiles
##     Estimate  Est.Error       Q1.5       Q50     Q98.5
## R2 0.1338352 0.04771195 0.04447219 0.1301455 0.2476114
loo_R2(model2, probs = c(0.015, 0.5, 0.985)) # 0.015, 0.5, 0.985 are the quantiles
## Warning: Some Pareto k diagnostic values are too high. See help('pareto-k-diagnostic') for details.
##       Estimate  Est.Error       Q1.5         Q50     Q98.5
## R2 -0.06298951 0.07628151 -0.2495848 -0.05938339 0.0881158

CHECKS ON MODEL

1. basic check of simulations based on posterior distribution, versus the real data distribution

checks whether actual data is similar to simulated data.

pp_check(model2, ndraws = 100) 

2. check some individual draws versus observed using pp_check

par(mfrow = c(1,1))
pp_check(model2, type = "hist", ndraws = 11, binwidth = 0.25) # separate histograms of 11 MCMC draws vs actual data

3. other pp_check graphs

pp_check(model2, type = "error_hist", ndraws = 11, , binwidth = 0.25) # separate histograms of errors for 11 draws

pp_check(model2, type = "scatter_avg", ndraws = 100) # scatter plot

pp_check(model2, type = "stat_2d") #  scatterplot of joint posteriors
## Using all posterior draws for ppc type 'stat_2d' by default.
## Note: in most cases the default test statistic 'mean' is too weak to detect anything of interest.

4. pairs plot

pairs(model2)

5. try the PSIS LOO-CV procedure to check model performance

a. run the loo function

loo_model2 <- loo(model2, moment_match = TRUE)
loo_model2
## 
## Computed from 24000 by 73 log-likelihood matrix.
## 
##          Estimate   SE
## elpd_loo   -102.6  7.0
## p_loo        10.7  2.3
## looic       205.3 14.0
## ------
## MCSE of elpd_loo is 0.1.
## MCSE and ESS estimates assume MCMC draws (r_eff in [0.6, 1.2]).
## 
## All Pareto k estimates are good (k < 0.7).
## See help('pareto-k-diagnostic') for details.

AUTOMATED PRIOR SENSITIVITY USING THE PRIOR SENSE PACKAGE

1. Try automated prior sensitivity analysis using the prior sense package

First, check the sensitivity of the prior and likelihood to power-scaling. Posterior and posteriors resulting from power-scaling.

powerscale_sensitivity(model2, 
                       variable = c("b_Intercept",
                                    "b_sAge",
                                    "b_breedckcs", "b_breedother", "b_breedpug", "b_breedret",
                                    "b_sexMale",
                                    "b_comorbidityyes"))
## Sensitivity based on cjs_dist
## Prior selection: all priors
## Likelihood selection: all data
## 
##          variable prior likelihood diagnosis
##       b_Intercept 0.046      0.051         -
##            b_sAge 0.012      0.092         -
##       b_breedckcs 0.019      0.072         -
##      b_breedother 0.036      0.064         -
##        b_breedpug 0.032      0.076         -
##        b_breedret 0.019      0.073         -
##         b_sexMale 0.014      0.092         -
##  b_comorbidityyes 0.021      0.115         -

2. Now use bayestestR package to check priors are informative

check_prior(model2, effects = "all")
##             Parameter Prior_Quality
## 1         b_Intercept   informative
## 2    b_comorbidityyes   informative
## 3              b_sAge   informative
## 4         b_breedckcs   informative
## 5          b_breedpug   informative
## 6          b_breedret   informative
## 7        b_breedother   informative
## 8           b_sexMale   informative
## 9 sd_visit__Intercept   informative

CHECK PRIOR PREDICTION LINES FROM FINAL MODEL

1. obtain draws of priors from final model

prior <- prior_draws(model2)
prior %>% glimpse()
## Rows: 24,000
## Columns: 11
## $ Intercept        <dbl> 0.49834141, 1.00992523, -0.02432336, -0.19580010, 0.4…
## $ b_comorbidityyes <dbl> 1.39106604, -1.84689682, 0.45871513, -0.87675773, -0.…
## $ b_sAge           <dbl> -0.74349270, -0.46312555, -0.01509912, -0.25089330, -…
## $ b_breedckcs      <dbl> -0.24004745, -0.94500266, -1.62020698, -0.65152734, -…
## $ b_breedpug       <dbl> 0.27830899, -1.73559112, 1.15388048, -0.39574612, 0.3…
## $ b_breedret       <dbl> 0.156605962, 0.009103568, -0.149236077, -1.075677793,…
## $ b_breedother     <dbl> 0.99324366, 0.48959724, -1.61858092, -1.03557625, 0.1…
## $ b_sexMale        <dbl> 0.085446157, 0.241017120, -1.154937454, -0.105595594,…
## $ Intercept_sigma  <dbl> -0.48046410, -2.91969964, -1.46334506, 4.45054685, 0.…
## $ alpha            <dbl> 3.15414988, 3.38138373, 4.84185186, 3.29769979, 5.266…
## $ sd_visit         <dbl> 0.15417643, 0.28194438, 1.82528139, 0.11231738, 0.321…

2. plot prior predictions

a. prior prediction lines for Age

set.seed(5)

prior %>% 
  slice_sample(n = 50) %>% 
  rownames_to_column("draw") %>% 
  expand_grid(a = c(-2, 2)) %>% 
  mutate(c = Intercept + b_sAge * a) %>% 
  
  ggplot(aes(x = a, y = c)) +
  geom_line(aes(group = draw),
            color = "firebrick", alpha = .4) +
  labs(x = "Age (std)",
       y = "log(cort) (std)") +
  coord_cartesian(ylim = c(-4, 4)) +
  theme_bw() +
  theme(panel.grid = element_blank()) 

b. plot prior predictions comorbidity (yes)

i. line plot

set.seed(5)

prior %>% 
  slice_sample(n = 50) %>% 
  rownames_to_column("draw") %>% 
  expand_grid(a = c(-2, 2)) %>% 
  mutate(c = Intercept + b_comorbidityyes * a) %>% 
  
  ggplot(aes(x = a, y = c)) +
  geom_line(aes(group = draw),
            color = "firebrick", alpha = .4) +
  geom_point(color = "firebrick", size = 2) +
  labs(x = "comorbidity",
       y = "log(cort) (std)") +
  coord_cartesian(ylim = c(-4, 4)) +
  theme_bw() +
  theme(panel.grid = element_blank())

ii. combined box & whisker with dot plot for comorbidity from prior

# set seed for repeatability
set.seed(666)

# create a new dataframe with 50 draws each from the prior
prior_50 <- prior %>% 
  slice_sample(n = 50) %>% 
  rownames_to_column("draw") %>% 
  expand_grid(a = c(0, 1)) %>% 
  mutate(c = Intercept + b_comorbidityyes * a) %>% 
  mutate(comorbidity = ifelse(a == 0, "no", "yes"))
  

# plot
  ggplot(data = prior_50, aes(x = comorbidity)) +
    
  # add a horizontal line at y=0
    geom_hline(yintercept=0, linetype="dashed", 
                color = "grey", size=1) +
    
  # create boxplot
   geom_boxplot(aes(y = c),
                 color = "black", width=0.75, lwd = 1) +

     
  # change the y-axis limits
    ylim(-2, 2.5) +
   
  # add points
   geom_point(aes(y = c), 
             size = 4, color = "firebrick3",
             position = position_jitter(width = 0.3)) +
             scale_shape(solid = FALSE) +

   theme_bw() +
   labs(title = "Draws from prior for effect of comorbidity on hair cortisol") +
         labs(y = paste0("Log Hair Cortisol (standardised)"),
              x = paste("Comorbidity")) +  
   theme(axis.title.y = element_text(size=12, face="bold"), 

            title = element_text(size=12, face="bold"),
            plot.title = element_text(hjust = 0.5),
            axis.text.x = element_text(color = "grey50", size = 12),
            axis.text.y = element_text(color = "grey8",size = 12))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).

iii. combined box & whisker with dot plot for comorbidity from posterior

# set seed for repeatability
set.seed(666)

draws <- as.data.frame(draws)
# create a new dataframe with 50 draws each from the prior
draws_50 <- draws %>% 
  slice_sample(n = 50) %>% 
  rownames_to_column("draw") %>% 
  expand_grid(a = c(0, 1)) %>% 
  mutate(c = Intercept + b_comorbidityyes * a) %>% 
  mutate(comorbidity = ifelse(a == 0, "no", "yes"))
  

# plot
  ggplot(data = draws_50, aes(x = comorbidity)) +
    
 # add a horizontal line at y=0
    geom_hline(yintercept=0, linetype="dashed", 
                color = "grey", size=1) +
  
 # create boxplot
   geom_boxplot(aes(y = c),
                 color = "black", width=0.75, lwd =1) +

# change the y-axis limits
    ylim(-2, 2.5) +
  
# add points
   geom_point(aes(y = c), 
             size = 4, color = "steelblue3",
             position = position_jitter(width = 0.3)) +
             scale_shape(solid = FALSE) +
    
   theme_bw() +
   labs(title = "Draws from posterior for effect of comorbidity on hair cortisol") +
         labs(y = paste0("Log Hair Cortisol (standardised)"),
              x = paste("Comorbidity")) +  
   theme(axis.title.y = element_text(size=12, face="bold"), 

            title = element_text(size=12, face="bold"),
            plot.title = element_text(hjust = 0.5),
            axis.text.x = element_text(color = "grey50", size = 12),
            axis.text.y = element_text(color = "grey8",size = 12))

c. plot prior predictions for sex (Male)

set.seed(5)

prior %>% 
  slice_sample(n = 50) %>% 
  rownames_to_column("draw") %>% 
  expand_grid(a = c(0, 1)) %>% 
  mutate(c = Intercept + b_sexMale * a) %>% 
  
  ggplot(aes(x = a, y = c)) +
  geom_line(aes(group = draw),
            color = "firebrick", alpha = .4) +
  geom_point(color = "firebrick", size = 2) +
  labs(x = "Sex breed",
       y = "log(cort) (std)") +
  coord_cartesian(ylim = c(-4, 4)) +
  theme_bw() +
  theme(panel.grid = element_blank()) 

d. Plot prior predictions for breedckcs

set.seed(5)

prior %>% 
  slice_sample(n = 50) %>% 
  rownames_to_column("draw") %>% 
  expand_grid(a = c(0, 1)) %>% 
  mutate(c = Intercept + b_breedckcs * a) %>% 
  
  ggplot(aes(x = a, y = c)) +
  geom_line(aes(group = draw),
            color = "firebrick", alpha = .4) +
  geom_point(color = "firebrick", size = 2) +
  labs(x = "CKCS breed",
       y = "log(cort) (std)") +
  coord_cartesian(ylim = c(-4, 4)) +
  theme_bw() +
  theme(panel.grid = element_blank()) 

e. Plot prior predictions for breedother

set.seed(5)

prior %>% 
  slice_sample(n = 50) %>% 
  rownames_to_column("draw") %>% 
  expand_grid(a = c(0, 1)) %>% 
  mutate(c = Intercept + b_breedother * a) %>% 
  
  ggplot(aes(x = a, y = c)) +
  geom_line(aes(group = draw),
            color = "firebrick", alpha = .4) +
  geom_point(color = "firebrick", size = 2) +
  labs(x = "Other breed",
       y = "log(cort) (std)") +
  coord_cartesian(ylim = c(-4, 4)) +
  theme_bw() +
  theme(panel.grid = element_blank()) 

f. Plot prior predictions for breedpug

set.seed(5)

prior %>% 
  slice_sample(n = 50) %>% 
  rownames_to_column("draw") %>% 
  expand_grid(a = c(0, 1)) %>% 
  mutate(c = Intercept + b_breedpug * a) %>% 
  
  ggplot(aes(x = a, y = c)) +
  geom_line(aes(group = draw),
            color = "firebrick", alpha = .4) +
  geom_point(color = "firebrick", size = 2) +
  labs(x = "Pug breed",
       y = "log(cort) (std)") +
  coord_cartesian(ylim = c(-4, 4)) +
  theme_bw() +
  theme(panel.grid = element_blank()) 

g. Plot prior predictions for breedret

set.seed(5)

prior %>% 
  slice_sample(n = 50) %>% 
  rownames_to_column("draw") %>% 
  expand_grid(a = c(0, 1)) %>% 
  mutate(c = Intercept + b_breedpug * a) %>% 
  
  ggplot(aes(x = a, y = c)) +
  geom_line(aes(group = draw),
            color = "firebrick", alpha = .4) +
  geom_point(color = "firebrick", size = 2) +
  labs(x = "Pug breed",
       y = "log(cort) (std)") +
  coord_cartesian(ylim = c(-4, 4)) +
  theme_bw() +
  theme(panel.grid = element_blank()) 

MANUAL POSTERIOR PREDICTIVE DISTRIBUTION CHECKS

NB Uses posterior_predict

1. Posterior predictive distribition plots for a categorical predictor variable (comorbidity)

# use posterior predict to simulate predictions
ppd <- posterior_predict(model2) 

par(mfrow = c(2,2))
vioplot(slgCort ~ comorbidity, data = df, main = "Observed", col = "steelblue3")
vioplot(ppd[sample(seq(1, dim(ppd)[1]), 1),] ~ df$comorbidity, main = "PPD", col = "firebrick3")
vioplot(ppd[sample(seq(1, dim(ppd)[1]), 1),] ~ df$comorbidity, main = "PPD", col = "firebrick3")
vioplot(ppd[sample(seq(1, dim(ppd)[1]), 1),] ~ df$comorbidity, main = "PPD", col = "firebrick3")

2. Posterior predictive distribition plots for a categorical predictor variable (sex)

par(mfrow = c(2,2))
vioplot(slgCort ~ sex, data = df, main = "Observed", col = "steelblue")
vioplot(ppd[sample(seq(1, dim(ppd)[1]), 1),] ~ df$sex, main = "PPD", col = "firebrick3")
vioplot(ppd[sample(seq(1, dim(ppd)[1]), 1),] ~ df$sex, main = "PPD", col = "firebrick3")
vioplot(ppd[sample(seq(1, dim(ppd)[1]), 1),] ~ df$sex, main = "PPD", col = "firebrick3")

3. Posterior predictive distribition plots for a categorical predictor variable with small group size

par(mfrow = c(2,2))
stripchart(slgCort ~ breed, vertical = TRUE, method = "jitter",
           col = "steelblue3", data = df, pch = 20, main = "Observed")
stripchart(ppd[sample(seq(1, dim(ppd)[1]), 1),] ~ breed, vertical = TRUE, method = "jitter",
           col = "firebrick3", data = df, pch = 20, main = "PPD")
stripchart(ppd[sample(seq(1, dim(ppd)[1]), 1),] ~ breed, vertical = TRUE, method = "jitter",
           col = "firebrick3", data = df, pch = 20, main = "PPD")
stripchart(ppd[sample(seq(1, dim(ppd)[1]), 1),] ~ breed, vertical = TRUE, method = "jitter",
           col = "firebrick3", data = df, pch = 20, main = "PPD")

ANALYSING THE POSTERIOR DISTRIBUTION

1. plot conditional effects from model

a. all conditional effects

plot(conditional_effects(model2), ask = FALSE)

a. conditional effect of comorbidity only

ce <- conditional_effects(model2, effects = "comorbidity", method = "posterior_predict")
ce_df <- ce[[1]][c(1, 9:12)]

ggplot(ce_df, aes(x=comorbidity, y=estimate__, group=1)) +
    geom_errorbar(width=.1, aes(ymin=lower__, ymax=upper__), colour=c("#F8766D", "#00BFC4"), linewidth = 2) +
    geom_point(shape=21, size=10, fill=c("#F8766D", "#00BFC4")) +
   theme_bw() +
    labs(title = "Conditional effect of comorbidity on hair cortisol") +
         labs(y = paste0("Log Hair Cortisol (standardised)")) +
         labs(x = paste0("Comorbidity")) +
         theme(axis.title.y = element_text(size=12, face="bold"), 
               axis.title.x = element_text(size=12, face="bold"),
               title = element_text(size=12, face="bold"),
               plot.title = element_text(hjust = 0.5),
               axis.text.x = element_text(color = "grey50", size = 12),
               axis.text.y = element_text(color = "grey50", size = 12))

2. mcmc_plot of model

a.just parameters of beta variables and also sigma for comorbidity

mcmc_plot(model2,
          variable = c("b_comorbidityyes",
         "b_sAge",
         "b_breedckcs",
         "b_breedpug",
         "b_breedret",
         "b_breedother",
         "b_sigma_comorbidityyes"))

b. all parameters except nu

mcmc_plot(model2,
          variable = c("Intercept",
         "b_Intercept", "alpha",
         "b_sigma_Intercept",
         "b_comorbidityyes",
         "b_sAge",
         "b_sexMale",
         "b_breedckcs",
         "b_breedpug",
         "b_breedret",
         "b_breedother",
         "b_sigma_comorbidityyes"))

model2$fit
## Inference for Stan model: anon_model.
## 4 chains, each with iter=8000; warmup=2000; thin=1; 
## post-warmup draws per chain=6000, total post-warmup draws=24000.
## 
##                           mean se_mean   sd    2.5%     25%     50%     75%
## b_Intercept              -0.59    0.00 0.38   -1.35   -0.83   -0.59   -0.35
## b_sigma_Intercept        -0.68    0.00 0.23   -1.08   -0.84   -0.69   -0.53
## b_comorbidityyes          0.65    0.00 0.24    0.17    0.49    0.65    0.81
## b_sAge                   -0.14    0.00 0.11   -0.34   -0.21   -0.14   -0.07
## b_breedckcs               0.00    0.00 0.40   -0.82   -0.26    0.01    0.27
## b_breedpug                0.03    0.00 0.38   -0.70   -0.23    0.03    0.28
## b_breedret               -0.10    0.00 0.31   -0.71   -0.30   -0.10    0.11
## b_breedother              0.20    0.00 0.31   -0.39   -0.01    0.19    0.40
## b_sexMale                 0.06    0.00 0.22   -0.36   -0.08    0.06    0.21
## b_sigma_comorbidityyes    0.77    0.00 0.25    0.24    0.61    0.79    0.95
## sd_visit__Intercept       0.34    0.00 0.35    0.01    0.09    0.22    0.46
## alpha                     3.13    0.01 1.30    0.81    2.27    3.01    3.88
## Intercept                 0.00    0.00 0.24   -0.50   -0.13    0.00    0.13
## Intercept_sigma          -0.06    0.00 0.10   -0.24   -0.13   -0.06    0.00
## r_visit[v0,Intercept]     0.01    0.00 0.23   -0.49   -0.08    0.00    0.10
## r_visit[v1,Intercept]    -0.01    0.00 0.24   -0.53   -0.10    0.00    0.08
## prior_Intercept           0.00    0.00 0.50   -1.00   -0.34    0.00    0.33
## prior_b_comorbidityyes    0.25    0.01 0.99   -1.71   -0.43    0.26    0.92
## prior_b_sAge             -0.01    0.00 0.50   -0.98   -0.35   -0.01    0.33
## prior_b_breedckcs         0.01    0.01 1.00   -1.96   -0.66    0.02    0.68
## prior_b_breedpug         -0.01    0.01 0.99   -1.97   -0.68    0.00    0.66
## prior_b_breedret          0.01    0.01 1.00   -1.96   -0.66    0.01    0.68
## prior_b_breedother        0.01    0.01 1.01   -1.99   -0.67    0.00    0.69
## prior_b_sexMale           0.01    0.01 1.00   -1.95   -0.67    0.02    0.69
## prior_Intercept_sigma     0.00    0.03 4.33   -7.78   -1.92    0.00    1.90
## prior_alpha               3.99    0.01 2.01    0.10    2.62    3.99    5.36
## prior_sd_visit            0.80    0.00 0.60    0.03    0.32    0.68    1.15
## lprior                  -10.72    0.01 0.66  -12.51  -10.99  -10.56  -10.27
## lp__                   -110.79    0.03 3.02 -117.59 -112.62 -110.45 -108.61
## z_1[1,1]                  0.04    0.01 0.77   -1.49   -0.44    0.04    0.52
## z_1[1,2]                 -0.03    0.01 0.78   -1.60   -0.52   -0.02    0.46
##                          97.5% n_eff Rhat
## b_Intercept               0.16 10764    1
## b_sigma_Intercept        -0.18 12556    1
## b_comorbidityyes          1.11 14997    1
## b_sAge                    0.07 17782    1
## b_breedckcs               0.76 18389    1
## b_breedpug                0.78 12216    1
## b_breedret                0.50 13993    1
## b_breedother              0.81 12112    1
## b_sexMale                 0.49 19605    1
## b_sigma_comorbidityyes    1.23 14658    1
## sd_visit__Intercept       1.31 11664    1
## alpha                     6.00 15474    1
## Intercept                 0.50  9600    1
## Intercept_sigma           0.14 17418    1
## r_visit[v0,Intercept]     0.53  9566    1
## r_visit[v1,Intercept]     0.51  9924    1
## prior_Intercept           0.99 24016    1
## prior_b_comorbidityyes    2.20 23516    1
## prior_b_sAge              0.98 23872    1
## prior_b_breedckcs         1.95 23665    1
## prior_b_breedpug          1.94 24453    1
## prior_b_breedret          1.99 24317    1
## prior_b_breedother        2.00 23671    1
## prior_b_sexMale           1.97 23978    1
## prior_Intercept_sigma     7.78 23398    1
## prior_alpha               7.91 23876    1
## prior_sd_visit            2.25 24697    1
## lprior                   -9.97 11825    1
## lp__                   -105.87  7797    1
## z_1[1,1]                  1.63 13136    1
## z_1[1,2]                  1.52 13315    1
## 
## Samples were drawn using NUTS(diag_e) at Sat Jul  5 23:54:16 2025.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

c. Parameters versus their priors

i. Intercept

mcmc_plot(model2,
          variable = c("Intercept", "prior_Intercept",
         "b_Intercept"))

ii. sigma_Intercept

mcmc_plot(model2,
          variable = c("b_sigma_Intercept", "prior_Intercept_sigma"))

iii. all betas versus their priors

mcmc_plot(model2,
          variable = c("b_comorbidityyes", "prior_b_comorbidityyes",
                       "b_sAge", "prior_b_sAge",
                       "b_sexMale", "prior_b_sexMale",
                       "b_breedckcs", "prior_b_breedckcs",
                       "b_breedpug", "prior_b_breedpug",
                       "b_breedret", "prior_b_breedret",
                       "b_breedother", "prior_b_breedother"))

comorbidity vs prior
1. distributional
mcmc_plot(model2,
          variable = c("b_comorbidityyes", "prior_b_comorbidityyes")) +


   theme_classic() +
    labs(title = "Prior vs posterior distribution for comorbidity") +
         labs(y = "") +
         labs(x = paste0("Possible parameter values")) +
    scale_y_discrete(labels=c("prior_b_comorbidityyes" = "Prior", "b_comorbidityyes" = "Posterior"),
                     limits = c("prior_b_comorbidityyes", "b_comorbidityyes")) +
         theme(axis.title.y = element_text(size=12, face="bold"), 
               axis.title.x = element_text(size=12, face="bold"),
               title = element_text(size=12, face="bold"),
               plot.title = element_text(hjust = 0.5),
               axis.text.x = element_text(color = "grey50", size = 12),
               axis.text.y = element_text(color = "grey8",size = 12))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

2. density
mcmc_plot(model2,
          variable = c("b_comorbidityyes", "prior_b_comorbidityyes"),
          type = "areas") +

   theme_classic() +
    labs(title = "Prior vs posterior distribution for comorbidity") +
         labs(y = "") +
         labs(x = paste0("Possible parameter values")) +
    scale_y_discrete(labels=c("prior_b_comorbidityyes" = "Prior", "b_comorbidityyes" = "Posterior"),
                     limits = c("prior_b_comorbidityyes", "b_comorbidityyes")) +
         theme(axis.title.y = element_text(size=12, face="bold"), 
               axis.title.x = element_text(size=12, face="bold"),
               title = element_text(size=12, face="bold"),
               plot.title = element_text(hjust = 0.5),
               axis.text.x = element_text(color = "grey50", size = 12),
               axis.text.y = element_text(color = "grey8",size = 12))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

iv. sAge

mcmc_plot(model2,
          variable = c("b_sAge", "prior_b_sAge"))

v. sexMale

mcmc_plot(model2,
          variable = c("b_sexMale", "prior_b_sexMale"))

vi. breedckcs

mcmc_plot(model2,
          variable = c("b_breedckcs", "prior_b_breedckcs"))

v. breedpug

mcmc_plot(model2,
          variable = c("b_breedpug", "prior_b_breedpug"))

vi. breedret

mcmc_plot(model2,
          variable = c("b_breedret", "prior_b_breedret"))

vii. breedother

mcmc_plot(model2,
          variable = c("b_breedother", "prior_b_breedother"))

3. plot all posterior distributions

posterior <- as.matrix(model2)
mcmc_areas(posterior,
    pars = c("b_Intercept",
         "b_sigma_Intercept",
         "alpha",
         "b_comorbidityyes",
         "b_sAge",
         "b_sexMale",
         "b_breedckcs",
         "b_breedpug",
         "b_breedret",
         "b_breedother",
         "b_sigma_comorbidityyes"),
    prob = 0.75) # arbitrary threshold for shading probability mass

4. plot posterior distribution for comorbidity

posterior <- as.matrix(model2)
mcmc_areas(posterior,
    pars = c("b_comorbidityyes",
             "b_sigma_comorbidityyes"),
    prob = 0.97) +  # arbitrary threshold for shading probability mass 
 
   theme_classic() +
     labs(title = "Posterior distribution for comorbidity parameters", 
         y = "Density distribution", 
         x = "Possible parameter values") +
    scale_y_discrete(labels=c("b_comorbidityyes" = "Beta", "b_sigma_comorbidityyes" = "Sigma"),
                     limits=c("b_sigma_comorbidityyes","b_comorbidityyes")) +
         theme(axis.title.y = element_text(size=12, face="bold"), 
               axis.title.x = element_text(size=12, face="bold"),
               title = element_text(size=12, face="bold"),
               plot.title = element_text(hjust = 0.5),
               axis.text.x = element_text(color = "grey50", size = 12),
               axis.text.y = element_text(color = "grey8",size = 12))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

5. Describe the posterior visually

a. all beta parameters

# Focus on describing posterior
hdi_range <- hdi(model2, ci = c(0.65, 0.70, 0.80, 0.89, 0.95))
plot(hdi_range, show_intercept = T)

b. beta for comorbidity only

# Focus on describing posterior
hdi_range <- hdi(model2, ci = c(0.65, 0.70, 0.80, 0.89, 0.97), parameters = "comorbidityyes")
plot(hdi_range, show_intercept = T) +

    labs(title = "Posterior distribution for comorbidity") +
         labs(y = "Density distribution") +
         labs(x = "Possible parameter values") +
           theme(axis.title.y = element_text(size=12, face="bold"), 
               axis.title.x = element_text(size=12, face="bold"),
               title = element_text(size=12, face="bold"),
               plot.title = element_text(hjust = 0.5),
               axis.text.x = element_text(color = "grey50", size = 12),
               axis.text.y = element_text(color = "grey8",size = 12))

POSTERIOR PREDICTION

1. posterior prediction for effect of comorbidity on Log cortisol

(when visit = vO, breed = mix, sAge = 0 (mean), sex = Female)

# create new dataframe which contains results of the 10th dog
new_data <- rbind(df2[10,], df2[10,])
# Now change one category to be different
new_data$comorbidity <- c("yes", "no")
# Visualise df to make sure it has worked
new_data
##     number   group visit season breed_group coat_colour  sex age comorbidity
## 12     c12 stopped    v0 autumn        ckcs        dark Male  92         yes
## 121    c12 stopped    v0 autumn        ckcs        dark Male  92          no
##     fat_percent cortisol      lgCort weight_loss_success breed       sFat
## 12     34.01262 1.001929 0.001927641                  no  ckcs -0.8268174
## 121    34.01262 1.001929 0.001927641                  no  ckcs -0.8268174
##           sAge    slgCort
## 12  -0.1081948 -0.9764601
## 121 -0.1081948 -0.9764601
# Now get mean predictions from the draws of the model
pred_means <- posterior_predict(model2, newdata = new_data)


# Compare difference in means for each breed versus mix
difference <- pred_means[,1] - pred_means[,2]

par(mfrow = c(2,2))

# Examine mean of difference for comorbidity
mean(difference)
## [1] 0.6496641
# View histogram of this
dens(difference)
# Create HPDI
HPDI(difference, 0.97)
##     |0.97     0.97| 
## -2.028449  3.554991

HYPOTHESIS TESTS

1. hypothesis test to check if mean association between cortisol and comborbidity is >0

draws <- as.matrix(model2)
mean(draws[,3] >0)
## [1] 0.994375

2. check 97% credible interval of with HPDI for comorbidity from draws

HPDI(draws[,3], prob=0.97)
##     |0.97     0.97| 
## 0.1336725 1.1887329

3. hypothesis test that the effect of comorbidity is positive

hypothesis(model2, "comorbidityyes >0")
## Hypothesis Tests for class b:
##             Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (comorbidityyes) > 0     0.65      0.24     0.25     1.03     176.78
##   Post.Prob Star
## 1      0.99    *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
hypothesis(model2, "comorbidityyes <0")
## Hypothesis Tests for class b:
##             Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (comorbidityyes) < 0     0.65      0.24     0.25     1.03       0.01
##   Post.Prob Star
## 1      0.01     
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.

VISUALISING PREDICTIONS

1. visualising the posterior predictions from the model

# create new dataframe which contains results of all dogs
new_data1 <- df2
# Now change one category to be different
new_data1$comorbidity <- c("yes")

# create a second new dataframe which contains results of all dogs
new_data2 <- df2
# Now change one category to be different
new_data2$comorbidity <- c("no")

# Now get predictions from the draws of the models
pred_nd1 <- posterior_predict(model2, newdata = new_data1)
pred_nd2 <- posterior_predict(model2, newdata = new_data2)
pred_diff <- pred_nd1 - pred_nd2
pred_diff <- data.frame(pred_diff)

# Create mean of differences for each column (dog) of the dataframe
pred_diff_mean <- apply(pred_diff, 2, mean)
# View histogram of mean differences
hist(pred_diff_mean)

# mean difference
mean(pred_diff_mean)
## [1] 0.6459975
# Create HPDI
HPDI(pred_diff_mean, 0.97)
##     |0.97     0.97| 
## 0.6350587 0.6582279

2. make predictions of log cortisol for each dog and compare with actual data

pred_slgCort <- posterior_epred(model2)
av_pred_slgCort <- colMeans(pred_slgCort)
plot(av_pred_slgCort ~ df$slgCort)

3. plot the counterfactual effect of “do comorbidity” on slgCort

a. plot estimates and 95% credible intervals

set.seed(666)
nd <- tibble(comorbidity = c('no', 'yes'), visit = "v0", sAge = 0, sex = "Female", breed = "mix", )

p1 <-
  predict(model2,
          resp = "slgCort",
          newdata = nd) %>% 
  data.frame() %>% 
  bind_cols(nd) %>% 
  
  ggplot(aes(x = comorbidity, y = Estimate, ymin = Q2.5, ymax = Q97.5)) +
  
  geom_linerange(aes(ymin = Q2.5, ymax = Q97.5),
                 linewidth = 2, color = c("#F8766D", "#00BFC4"), alpha = 3/5) +
  geom_point(size = 8, color = c("#F8766D", "#00BFC4")) +

   theme_bw() +
    labs(title = "Log hair cortisol (standardised)") +
         labs(y = paste0("Log hair cortisol (standardised)")) +
         labs(x = paste0("Comorbidity")) +
         theme(axis.title.y = element_text(size=12, face="bold"), 
               axis.title.x = element_text(size=12, face="bold"),
               title = element_text(size=12, face="bold"),
               plot.title = element_text(hjust = 0.5))

plot(p1)

Citations and package versions for packages

citation()
## To cite R in publications use:
## 
##   R Core Team (2025). _R: A Language and Environment for Statistical
##   Computing_. R Foundation for Statistical Computing, Vienna, Austria.
##   <https://www.R-project.org/>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {R: A Language and Environment for Statistical Computing},
##     author = {{R Core Team}},
##     organization = {R Foundation for Statistical Computing},
##     address = {Vienna, Austria},
##     year = {2025},
##     url = {https://www.R-project.org/},
##   }
## 
## We have invested a lot of time and effort in creating R, please cite it
## when using it for data analysis. See also 'citation("pkgname")' for
## citing R packages.
citation("dlookr")
## To cite package 'dlookr' in publications use:
## 
##   Ryu C (2024). _dlookr: Tools for Data Diagnosis, Exploration,
##   Transformation_. R package version 0.6.3,
##   <https://CRAN.R-project.org/package=dlookr>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {{dlookr}: Tools for Data Diagnosis, Exploration, Transformation},
##     author = {Choonghyun Ryu},
##     year = {2024},
##     note = {R package version 0.6.3},
##     url = {https://CRAN.R-project.org/package=dlookr},
##   }
packageVersion("dlookr")
## [1] '0.6.3'
citation("dplyr")
## To cite package 'dplyr' in publications use:
## 
##   Wickham H, François R, Henry L, Müller K, Vaughan D (2023). _dplyr: A
##   Grammar of Data Manipulation_. R package version 1.1.4,
##   <https://CRAN.R-project.org/package=dplyr>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {dplyr: A Grammar of Data Manipulation},
##     author = {Hadley Wickham and Romain François and Lionel Henry and Kirill Müller and Davis Vaughan},
##     year = {2023},
##     note = {R package version 1.1.4},
##     url = {https://CRAN.R-project.org/package=dplyr},
##   }
packageVersion("dplyr")
## [1] '1.1.4'
citation("ggplot2")
## To cite ggplot2 in publications, please use
## 
##   H. Wickham. ggplot2: Elegant Graphics for Data Analysis.
##   Springer-Verlag New York, 2016.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Book{,
##     author = {Hadley Wickham},
##     title = {ggplot2: Elegant Graphics for Data Analysis},
##     publisher = {Springer-Verlag New York},
##     year = {2016},
##     isbn = {978-3-319-24277-4},
##     url = {https://ggplot2.tidyverse.org},
##   }
packageVersion("ggplot2")
## [1] '3.5.2'
citation("psych")
## To cite package 'psych' in publications use:
## 
##   William Revelle (2025). _psych: Procedures for Psychological,
##   Psychometric, and Personality Research_. Northwestern University,
##   Evanston, Illinois. R package version 2.5.3,
##   <https://CRAN.R-project.org/package=psych>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {psych: Procedures for Psychological, Psychometric, and Personality Research},
##     author = {{William Revelle}},
##     organization = {Northwestern University},
##     address = {Evanston, Illinois},
##     year = {2025},
##     note = {R package version 2.5.3},
##     url = {https://CRAN.R-project.org/package=psych},
##   }
packageVersion("psych")
## [1] '2.5.3'
citation("reshape")
## To cite reshape in publications, please use:
## 
##   H. Wickham. Reshaping data with the reshape package. Journal of
##   Statistical Software, 21(12), 2007.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Article{,
##     author = {Hadley Wickham},
##     journal = {Journal of Statistical Software},
##     number = {12},
##     title = {Reshaping data with the reshape package},
##     url = {https://www.jstatsoft.org/v21/i12/},
##     volume = {21},
##     year = {2007},
##   }
packageVersion("reshape")
## [1] '0.8.9'
citation("readxl")
## To cite package 'readxl' in publications use:
## 
##   Wickham H, Bryan J (2025). _readxl: Read Excel Files_. R package
##   version 1.4.5, <https://CRAN.R-project.org/package=readxl>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {readxl: Read Excel Files},
##     author = {Hadley Wickham and Jennifer Bryan},
##     year = {2025},
##     note = {R package version 1.4.5},
##     url = {https://CRAN.R-project.org/package=readxl},
##   }
packageVersion("readxl")
## [1] '1.4.5'
citation("tidyverse")
## To cite package 'tidyverse' in publications use:
## 
##   Wickham H, Averick M, Bryan J, Chang W, McGowan LD, François R,
##   Grolemund G, Hayes A, Henry L, Hester J, Kuhn M, Pedersen TL, Miller
##   E, Bache SM, Müller K, Ooms J, Robinson D, Seidel DP, Spinu V,
##   Takahashi K, Vaughan D, Wilke C, Woo K, Yutani H (2019). "Welcome to
##   the tidyverse." _Journal of Open Source Software_, *4*(43), 1686.
##   doi:10.21105/joss.01686 <https://doi.org/10.21105/joss.01686>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Article{,
##     title = {Welcome to the {tidyverse}},
##     author = {Hadley Wickham and Mara Averick and Jennifer Bryan and Winston Chang and Lucy D'Agostino McGowan and Romain François and Garrett Grolemund and Alex Hayes and Lionel Henry and Jim Hester and Max Kuhn and Thomas Lin Pedersen and Evan Miller and Stephan Milton Bache and Kirill Müller and Jeroen Ooms and David Robinson and Dana Paige Seidel and Vitalie Spinu and Kohske Takahashi and Davis Vaughan and Claus Wilke and Kara Woo and Hiroaki Yutani},
##     year = {2019},
##     journal = {Journal of Open Source Software},
##     volume = {4},
##     number = {43},
##     pages = {1686},
##     doi = {10.21105/joss.01686},
##   }
packageVersion("tidyverse")
## [1] '2.0.0'
citation("vioplot")
## To cite the enhanced vioplot package in publications use:
## 
##   Daniel Adler, S. Thomas Kelly, Tom Elliott, and Jordan Adamson
##   (2025). vioplot: violin plot. R package version 0.5.1
##   https://github.com/TomKellyGenetics/vioplot
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {vioplot: violin plot},
##     author = {Daniel Adler and S. Thomas Kelly and Tom Elliott and Jordan Adamson},
##     year = {2025},
##     note = {R package version 0.5.1},
##     url = {https://github.com/TomKellyGenetics/vioplot},
##   }
## 
## Please also acknowledge the original package: citation("vioplot")
packageVersion("vioplot")
## [1] '0.5.1'
citation("vtable")
## To cite package 'vtable' in publications use:
## 
##   Huntington-Klein N (2024). _vtable: Variable Table for Variable
##   Documentation_. R package version 1.4.8,
##   <https://CRAN.R-project.org/package=vtable>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {vtable: Variable Table for Variable Documentation},
##     author = {Nick Huntington-Klein},
##     year = {2024},
##     note = {R package version 1.4.8},
##     url = {https://CRAN.R-project.org/package=vtable},
##   }
packageVersion("vtable")
## [1] '1.4.8'
citation("brms")
## To cite brms in publications use:
## 
##   Paul-Christian Bürkner (2017). brms: An R Package for Bayesian
##   Multilevel Models Using Stan. Journal of Statistical Software, 80(1),
##   1-28. doi:10.18637/jss.v080.i01
## 
## Paul-Christian Bürkner (2018). Advanced Bayesian Multilevel Modeling
## with the R Package brms. The R Journal, 10(1), 395-411.
## doi:10.32614/RJ-2018-017
## 
## Paul-Christian Bürkner (2021). Bayesian Item Response Modeling in R
## with brms and Stan. Journal of Statistical Software, 100(5), 1-54.
## doi:10.18637/jss.v100.i05
## 
## To see these entries in BibTeX format, use 'print(<citation>,
## bibtex=TRUE)', 'toBibtex(.)', or set
## 'options(citation.bibtex.max=999)'.
packageVersion("brms")
## [1] '2.22.0'
citation("bayesplot")
## To cite the bayesplot R package:
## 
##   Gabry J, Mahr T (2025). "bayesplot: Plotting for Bayesian Models." R
##   package version 1.12.0, <https://mc-stan.org/bayesplot/>.
## 
## To cite the bayesplot paper 'Visualization in Bayesian workflow':
## 
##   Gabry J, Simpson D, Vehtari A, Betancourt M, Gelman A (2019).
##   "Visualization in Bayesian workflow." _J. R. Stat. Soc. A_, *182*,
##   389-402. doi:10.1111/rssa.12378 <https://doi.org/10.1111/rssa.12378>.
## 
## To see these entries in BibTeX format, use 'print(<citation>,
## bibtex=TRUE)', 'toBibtex(.)', or set
## 'options(citation.bibtex.max=999)'.
packageVersion("bayesplot")
## [1] '1.12.0'
citation("bayestestR")
## To cite bayestestR in publications use:
## 
##   Makowski, D., Ben-Shachar, M., & Lüdecke, D. (2019). bayestestR:
##   Describing Effects and their Uncertainty, Existence and Significance
##   within the Bayesian Framework. Journal of Open Source Software,
##   4(40), 1541. doi:10.21105/joss.01541
## 
## A BibTeX entry for LaTeX users is
## 
##   @Article{,
##     title = {bayestestR: Describing Effects and their Uncertainty, Existence and Significance within the Bayesian Framework.},
##     author = {Dominique Makowski and Mattan S. Ben-Shachar and Daniel Lüdecke},
##     journal = {Journal of Open Source Software},
##     doi = {10.21105/joss.01541},
##     year = {2019},
##     number = {40},
##     volume = {4},
##     pages = {1541},
##     url = {https://joss.theoj.org/papers/10.21105/joss.01541},
##   }
packageVersion("bayestestR")
## [1] '0.16.0'
citation("rethinking")
## To cite package 'rethinking' in publications use:
## 
##   McElreath R (2024). _rethinking: Statistical Rethinking book
##   package_. R package version 2.42, commit
##   ac1b3b2cda83f3e14096e2d997a6e30ad109eeee,
##   <https://github.com/rmcelreath/rethinking>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {rethinking: Statistical Rethinking book package},
##     author = {Richard McElreath},
##     year = {2024},
##     note = {R package version 2.42, commit ac1b3b2cda83f3e14096e2d997a6e30ad109eeee},
##     url = {https://github.com/rmcelreath/rethinking},
##   }
## 
## ATTENTION: This citation information has been auto-generated from the
## package DESCRIPTION file and may need manual editing, see
## 'help("citation")'.
packageVersion("rethinking")
## [1] '2.42'
citation("loo")
## To cite the loo R package:
## 
##   Vehtari A, Gabry J, Magnusson M, Yao Y, Bürkner P, Paananen T, Gelman
##   A (2024). "loo: Efficient leave-one-out cross-validation and WAIC for
##   Bayesian models." R package version 2.8.0,
##   <https://mc-stan.org/loo/>.
## 
## To cite the loo paper:
## 
##   Vehtari A, Gelman A, Gabry J (2017). "Practical Bayesian model
##   evaluation using leave-one-out cross-validation and WAIC."
##   _Statistics and Computing_, *27*, 1413-1432.
##   doi:10.1007/s11222-016-9696-4
##   <https://doi.org/10.1007/s11222-016-9696-4>.
## 
## To cite the stacking paper:
## 
##   Yao Y, Vehtari A, Simpson D, Gelman A (2017). "Using stacking to
##   average Bayesian predictive distributions." _Bayesian Analysis_.
##   doi:10.1214/17-BA1091 <https://doi.org/10.1214/17-BA1091>.
## 
## To see these entries in BibTeX format, use 'print(<citation>,
## bibtex=TRUE)', 'toBibtex(.)', or set
## 'options(citation.bibtex.max=999)'.
packageVersion("loo")
## [1] '2.8.0'
citation("priorsense")
## To cite the priorsense package and power-scaling sensitivity:
## 
##   Kallioinen, N., Paananen, T., Bürkner, P-C., Vehtari, A. (2023).
##   Detecting and diagnosing prior and likelihood sensitivity with
##   power-scaling. Statistics and Computing. 34(57).
##   doi:10.1007/s11222-023-10366-5
## 
## To cite Pareto-smoothed importance sampling:
## 
##   Aki Vehtari, Daniel Simpson, Andrew Gelman, Yuling Yao, Jonah Gabry
##   (2024). Pareto smoothed importance sampling. Journal of Machine
##   Learning Research 25(72) https://jmlr.org/papers/v25/19-556.html
## 
## To cite importance weighted moment matching:
## 
##   Topi Paananen, Juho Piironen, Paul-Christian Bürkner, Aki Vehtari
##   (2021). Implicitly adaptive importance sampling. Statistics and
##   Computing, 31(16), 1-19. doi:10.1007/s11222-020-09982-2
## 
## To see these entries in BibTeX format, use 'print(<citation>,
## bibtex=TRUE)', 'toBibtex(.)', or set
## 'options(citation.bibtex.max=999)'.
packageVersion("priorsense")
## [1] '1.1.0'